In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("./../..")
In [2]:
%reload_ext yellowbrick
%matplotlib inline
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.preprocessing import OneHotEncoder
from yellowbrick.contrib.missing import MissingValuesDispersion, MissingValuesBar
from sklearn.datasets import make_classification
In [3]:
headers = pd.read_csv("./horse-colic.attrs")
In [4]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/horse-colic/horse-colic.data'
# Retrieve Data Set
df = pd.read_csv(url, delim_whitespace=True)
df.columns = headers.Attribute.values
df.replace(to_replace="?", value=np.nan, inplace=True,)
In [5]:
X = df.drop(['cp_data'], axis=1)
y = df['cp_data']
In [6]:
classes=['sick', 'healthy']
In [7]:
viz = MissingValuesDispersion(classes=classes)
viz.fit(X)
viz.show()
In [8]:
viz = MissingValuesDispersion(classes=classes)
viz.fit(X, y=y)
viz.show()
In [9]:
oz = MissingValuesBar(classes=classes)
oz.fit(X)
oz.show()
In [10]:
oz = MissingValuesBar(classes=classes)
oz.fit(X, y=y)
oz.show()
In [ ]: